# coding: utf-8
import json
import os

# 从文件中加载诗词文本
Poems_type_name = '全唐诗'
前缀 = "唐"
文件类型 = ".json"
# 文件夹路径
folder_path = './row_datasets/'
# 获取文件夹下所有前缀为ci.song的.json文件
json_files = [f for f in os.listdir(os.path.join(folder_path, Poems_type_name)) if f.startswith(前缀) and f.endswith(文件类型)]
xd_path = './row_datasets/'+Poems_type_name
# cut_word=["。","，","、","…","《","》","(",")","（","？","）"]
# 遍历文件并操作
for file_path in json_files:
    with open(os.path.join(xd_path,file_path), 'r+', encoding='utf-8') as file:
        json_data = json.load(file)
        for x in json_data:
            with open("datasets/"+Poems_type_name+".txt", 'a', encoding='utf-8') as f:
                for i in x['paragraphs']:
                    f.write("".join(i)
                            .replace("。", "")
                            .replace("，", "")
                            .replace("、", "")
                            .replace("…", "")
                            .replace("《", "")
                            .replace("》", "")
                            .replace("？", "")
                            .replace("）", "")
                            .replace("（", "")
                            .replace("！", "")
                            )
    print(file_path+":处理完成")






